#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
================================================================================
Programmer:     Jonathan E. Landrum
Program:        complaint-scraper.py
Description:    Scrapes phone number complaints from a website and makes the
                data available via a RESTful API.
Dependencies:   bs4
                bottle
                bottle.ext.sqlite
                cfscrape
                json
                lxml
                os
                socket
                socks
                sqlite3
                sys
References:     https://deshmukhsuraj.wordpress.com/2015/03/08/anonymous-web-
                    scraping-using-python-and-tor/
                http://www.gregreda.com/2013/03/03/web-scraping-101-with-python/
                http://docs.python-guide.org/en/latest/scenarios/scrape/
                https://github.com/Anorov/cloudflare-scrape
Usage:          Run the script with `python complaint-scraper.py`
                Visit `http://localhost:8080` to view results
                Routes defined:
                    /       Returns all results in the database
                    /123    Returns the results for this area code only, where
                            123 is some valid area code
Version:        1.3
================================================================================
Changelog:
Time                Note                                                Version
--------------------------------------------------------------------------------
14 Dec 15 13:00 CST Added route for favicon.ico to remove errors        1.3
14 Dec 15 11:30 CST Refactored purge() and getComments()                1.2
14 Dec 15 11:00 CST Modified the API to be more user-friendly           1.1
12 Dec 15 03:30 CST Completed the API                                   1.0
12 Dec 15 02:15 CST Successfully getting JSON results from the API      0.4
11 Dec 15 23:45 CST Successfully storing results in the database        0.3
11 Dec 15 22:30 CST Successfully pulling data from target site          0.2
11 Dec 15 19:00 CST Testing database connection                         0.1
================================================================================
"""

from bottle import response, route, run
from bs4 import BeautifulSoup
from lxml import html
import bottle
import bottle.ext.sqlite
import cfscrape
import json
import os
import socket
import socks
import sqlite3 as sql
import sys

app = bottle.Bottle()
plugin = bottle.ext.sqlite.Plugin(dbfile='complaint-scraper.db')
app.install(plugin)

def touch():
    # See if the database file exists, and create it if not
    if not os.path.exists('complaint-scraper.db'):
        open('complaint-scraper.db', 'w').close()

def init():
    try:
        touch()
        
        # Create the table
        with sql.connect('complaint-scraper.db') as con:
            with con as cur:
                cur.execute('''
                    CREATE TABLE IF NOT EXISTS Comments(
                        ID INTEGER PRIMARY KEY AUTOINCREMENT,
                        Area_Code INTEGER,
                        Full_Number INTEGER,
                        Comment TEXT,
                        Num_Comments INTEGER)
                    ''')
    except sql.Error, e:
        print "Error %s:" % e.args[0]

def purge():
    try:
        touch()
        
        # Drop the table
        with sql.connect('complaint-scraper.db') as con:
            with con as cur:
                cur.execute('''DROP TABLE IF EXISTS Comments''')

        init()
    except sql.Error, e:
        print "Error %s:" % e.args[0]

def scrape():
    try:
        purge()
        
        # Connect to the site
        scrp = cfscrape.create_scraper()
        rqst = scrp.get('http://800notes.com/').content
        soup = BeautifulSoup(rqst, 'lxml')
        
        # Connect to the database
        with sql.connect('complaint-scraper.db') as con:
            with con as cur:
                for div in soup.findAll('div', class_='oos_preview'):
                    cnt = div.find('div', class_='oos_previewSide')
                    wrp = div.find('div', class_='oos_previewMain')
                    num = wrp.find('div', class_='oos_previewHeader')
                    lnk = num.find('a',   class_='oos_previewTitle')
                    txt = wrp.find('div', class_='oos_previewBody')
                    areaCode = lnk.text[:3]
                    fullNmbr = areaCode + lnk.text[4:7] + lnk.text[8:]
                    cmntText = txt.text
                    numCmnts = cnt.text
                    cur.execute('''
                        INSERT INTO Comments(
                            Area_Code, Full_Number, Comment, Num_Comments)
                        VALUES(?,?,?,?)
                        ''', (areaCode, fullNmbr, cmntText, numCmnts))

    except sql.IntegrityError, e:
        print "Error: %s" % e.args[0]
    except sql.Error, e:
        print "Error: %s" % e.args[0]

@app.route('/favicon.ico')
def favicon():
    return ""

@app.route('/', method='GET')
@app.route('/<ac>', method='GET')
@app.route('/<ac>/', method='GET')
def get(ac=''):
    scrape()
    response.content_type = 'application/json'
    result = []
    try:
        # Connect to the database
        with sql.connect('complaint-scraper.db') as con:
            with con as cur:
                cmd = '''SELECT * FROM Comments'''
                if ac:
                    cmd += ''' WHERE Area_Code = ''' + ac
                for row in cur.execute(cmd):
                    res = {
                        'Area Code':row[1],
                        'Full Number':row[2],
                        'Comment':row[3],
                        'Number of Comments':row[4]
                        }
                    result.append(res)
        return json.dumps(result)
    except sql.IntegrityError, e:
        print "Error: %s" % e.args[0]
    except sql.Error, e:
        print "Error: %s" % e.args[0]

app.run(host='localhost', port=8080, debug=True)
